TODO
library(dplyr)
library(ggplot2)
library(tidyr)
library(plotly)
library(knitr)
data <- read.csv("mp_batteries.csv")
columns <- names(data)
string_columns <- c("Battery.Formula", "Working.Ion", "Formula.Charge", "Formula.Discharge")
numeric_columns <- setdiff(columns, c(string_columns, "Battery.ID"))
Liczba wierszy: 4351.
Podsumowanie:
knitr::kable(summary(data))
| Battery.ID | Battery.Formula | Working.Ion | Formula.Charge | Formula.Discharge | Max.Delta.Volume | Average.Voltage | Gravimetric.Capacity | Volumetric.Capacity | Gravimetric.Energy | Volumetric.Energy | Atomic.Fraction.Charge | Atomic.Fraction.Discharge | Stability.Charge | Stability.Discharge | Steps | Max.Voltage.Step | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Length:4351 | Length:4351 | Length:4351 | Length:4351 | Length:4351 | Min. : 0.00002 | Min. :-7.755 | Min. : 5.176 | Min. : 24.08 | Min. :-583.5 | Min. :-2208.1 | Min. :0.00000 | Min. :0.007407 | Min. :0.00000 | Min. :0.00000 | Min. :1.000 | Min. : 0.0000 | |
| Class :character | Class :character | Class :character | Class :character | Class :character | 1st Qu.: 0.01747 | 1st Qu.: 2.226 | 1st Qu.: 88.108 | 1st Qu.: 311.62 | 1st Qu.: 211.7 | 1st Qu.: 821.6 | 1st Qu.:0.00000 | 1st Qu.:0.086957 | 1st Qu.:0.03301 | 1st Qu.:0.01952 | 1st Qu.:1.000 | 1st Qu.: 0.0000 | |
| Mode :character | Mode :character | Mode :character | Mode :character | Mode :character | Median : 0.04203 | Median : 3.301 | Median : 130.691 | Median : 507.03 | Median : 401.8 | Median : 1463.8 | Median :0.00000 | Median :0.142857 | Median :0.07319 | Median :0.04878 | Median :1.000 | Median : 0.0000 | |
| NA | NA | NA | NA | NA | Mean : 0.37531 | Mean : 3.083 | Mean : 158.291 | Mean : 610.62 | Mean : 444.1 | Mean : 1664.0 | Mean :0.03986 | Mean :0.159077 | Mean :0.14257 | Mean :0.12207 | Mean :1.167 | Mean : 0.1503 | |
| NA | NA | NA | NA | NA | 3rd Qu.: 0.08595 | 3rd Qu.: 4.019 | 3rd Qu.: 187.600 | 3rd Qu.: 722.75 | 3rd Qu.: 614.4 | 3rd Qu.: 2252.3 | 3rd Qu.:0.04762 | 3rd Qu.:0.200000 | 3rd Qu.:0.13160 | 3rd Qu.:0.09299 | 3rd Qu.:1.000 | 3rd Qu.: 0.0000 | |
| NA | NA | NA | NA | NA | Max. :293.19322 | Max. :54.569 | Max. :2557.627 | Max. :7619.19 | Max. :5926.9 | Max. :18305.9 | Max. :0.90909 | Max. :0.993333 | Max. :6.48710 | Max. :6.27781 | Max. :6.000 | Max. :26.9607 |
10 naliczniej występujących wartości dla każdego atrybutu tekstowego
counts <- data.frame()
for (col in string_columns)
{
col_counts <- data %>%
count(get(col), name = "Count") %>%
rename(Value = "get(col)") %>%
arrange(desc(Count))
col_counts$var <- col
counts <- rbind(counts, col_counts)
top_10_counts <- col_counts %>%
select(c("Value", "Count")) %>%
slice(1:10)
print(knitr::kable(top_10_counts, caption = paste("10 najliczniej występujących wartości zmiennej", col)))
cat("\n")
}
| Value | Count |
|---|---|
| Li0-1V2OF5 | 19 |
| Li0-1CoPO4 | 18 |
| Li0-1FePO4 | 18 |
| Li0-3MnFeCo(PO4)3 | 17 |
| Li0-1MnPO4 | 15 |
| Li0-1V4OF11 | 15 |
| Li0-1V4O5F7 | 12 |
| Li0-1VF5 | 12 |
| Li0-1CrP2O7 | 11 |
| Li0-2MnP2O7 | 11 |
| Value | Count |
|---|---|
| Li | 2440 |
| Ca | 435 |
| Mg | 423 |
| Zn | 366 |
| Na | 309 |
| K | 107 |
| Al | 95 |
| Y | 93 |
| Rb | 50 |
| Cs | 33 |
| Value | Count |
|---|---|
| MnO2 | 49 |
| TiO2 | 47 |
| VO2 | 46 |
| CrO2 | 45 |
| CoO2 | 43 |
| NiO2 | 41 |
| FeO2 | 36 |
| FePO4 | 26 |
| WO2 | 25 |
| CoPO4 | 24 |
| Value | Count |
|---|---|
| LiCoPO4 | 19 |
| LiFePO4 | 19 |
| LiMnPO4 | 19 |
| LiV2OF5 | 19 |
| Li5Mn6(BO3)6 | 18 |
| Li3MnFeCo(PO4)3 | 17 |
| LiV4OF11 | 15 |
| Li2MnP2O7 | 14 |
| Li2FeSiO4 | 13 |
| LiCrPO4 | 12 |
ggplot(counts, aes(x = Count)) +
geom_histogram(binwidth = 1, fill = "green", alpha = 0.7) +
labs(
title = paste("Liczba wystąpień wartości dla zmiennej"),
x = "Liczba wystąpień",
y = "Liczba różnych wartości"
) +
facet_wrap(~var, scales="free") +
theme_minimal()
Wartości puste:
nan_counts <- sapply(numeric_columns, function(col) sum(is.nan(data[[col]])))
nan_counts_df <- data.frame(
nan = nan_counts
)
print(knitr::kable(nan_counts_df))
| nan | |
|---|---|
| Max.Delta.Volume | 0 |
| Average.Voltage | 0 |
| Gravimetric.Capacity | 0 |
| Volumetric.Capacity | 0 |
| Gravimetric.Energy | 0 |
| Volumetric.Energy | 0 |
| Atomic.Fraction.Charge | 0 |
| Atomic.Fraction.Discharge | 0 |
| Stability.Charge | 0 |
| Stability.Discharge | 0 |
| Steps | 0 |
| Max.Voltage.Step | 0 |
Rozkłady wartości:
numeric_df <- data[, numeric_columns]
numeric_df_long <- numeric_df %>%
pivot_longer(colnames(numeric_df)) %>%
as.data.frame()
ggplot(numeric_df_long, aes(x = value)) +
geom_histogram(fill = "green", alpha = 0.7) +
facet_wrap(~ name, scales = "free") +
theme_minimal()
numeric_df <- data[, numeric_columns]
correlation_matrix <- cor(numeric_df)
correlation_df <- as.data.frame(as.table(correlation_matrix))
names(correlation_df) <- c("x", "y", "cor")
correlation_df_one_dir <- correlation_df[as.character(correlation_df$x) < as.character(correlation_df$y), ]
Korelacja wszytskich par zmiennych numerycznych
knitr::kable(correlation_df_one_dir[order(-abs(correlation_df_one_dir$cor)), ])
| x | y | cor | |
|---|---|---|---|
| 65 | Gravimetric.Energy | Volumetric.Energy | 0.9283253 |
| 39 | Gravimetric.Capacity | Volumetric.Capacity | 0.8584163 |
| 117 | Stability.Charge | Stability.Discharge | 0.8028701 |
| 32 | Atomic.Fraction.Discharge | Gravimetric.Capacity | 0.6807716 |
| 50 | Average.Voltage | Gravimetric.Energy | 0.6656523 |
| 44 | Atomic.Fraction.Discharge | Volumetric.Capacity | 0.6180186 |
| 91 | Atomic.Fraction.Charge | Atomic.Fraction.Discharge | 0.5974157 |
| 62 | Average.Voltage | Volumetric.Energy | 0.5545191 |
| 132 | Max.Voltage.Step | Steps | 0.5352539 |
| 3 | Gravimetric.Capacity | Max.Delta.Volume | 0.4337733 |
| 137 | Gravimetric.Energy | Max.Voltage.Step | 0.3292322 |
| 64 | Volumetric.Capacity | Volumetric.Energy | 0.3257482 |
| 125 | Gravimetric.Energy | Steps | 0.2946075 |
| 8 | Atomic.Fraction.Discharge | Max.Delta.Volume | 0.2906921 |
| 72 | Max.Voltage.Step | Volumetric.Energy | 0.2526625 |
| 37 | Max.Delta.Volume | Volumetric.Capacity | 0.2424769 |
| 71 | Steps | Volumetric.Energy | 0.2381420 |
| 63 | Gravimetric.Capacity | Volumetric.Energy | 0.2304216 |
| 51 | Gravimetric.Capacity | Gravimetric.Energy | 0.2132463 |
| 38 | Average.Voltage | Volumetric.Capacity | -0.2128178 |
| 41 | Gravimetric.Energy | Volumetric.Capacity | 0.2098406 |
| 69 | Stability.Charge | Volumetric.Energy | 0.1783271 |
| 20 | Atomic.Fraction.Discharge | Average.Voltage | -0.1716903 |
| 101 | Gravimetric.Energy | Stability.Charge | 0.1669819 |
| 98 | Average.Voltage | Stability.Charge | 0.1661371 |
| 128 | Atomic.Fraction.Discharge | Steps | 0.1641713 |
| 67 | Atomic.Fraction.Charge | Volumetric.Energy | -0.1473523 |
| 26 | Average.Voltage | Gravimetric.Capacity | -0.1462222 |
| 123 | Gravimetric.Capacity | Steps | 0.1333977 |
| 31 | Atomic.Fraction.Charge | Gravimetric.Capacity | 0.1289210 |
| 110 | Average.Voltage | Stability.Discharge | -0.1284568 |
| 134 | Average.Voltage | Max.Voltage.Step | 0.1271208 |
| 47 | Steps | Volumetric.Capacity | 0.1037051 |
| 140 | Atomic.Fraction.Discharge | Max.Voltage.Step | 0.1019796 |
| 45 | Stability.Charge | Volumetric.Capacity | 0.1015305 |
| 55 | Atomic.Fraction.Charge | Gravimetric.Energy | -0.0972924 |
| 135 | Gravimetric.Capacity | Max.Voltage.Step | 0.0951906 |
| 108 | Max.Voltage.Step | Stability.Charge | 0.0940466 |
| 2 | Average.Voltage | Max.Delta.Volume | -0.0823707 |
| 113 | Gravimetric.Energy | Stability.Discharge | -0.0782609 |
| 56 | Atomic.Fraction.Discharge | Gravimetric.Energy | 0.0645248 |
| 99 | Gravimetric.Capacity | Stability.Charge | 0.0633871 |
| 130 | Stability.Discharge | Steps | -0.0631686 |
| 122 | Average.Voltage | Steps | 0.0627851 |
| 48 | Max.Voltage.Step | Volumetric.Capacity | 0.0626085 |
| 68 | Atomic.Fraction.Discharge | Volumetric.Energy | 0.0610586 |
| 5 | Gravimetric.Energy | Max.Delta.Volume | -0.0609858 |
| 70 | Stability.Discharge | Volumetric.Energy | -0.0599949 |
| 61 | Max.Delta.Volume | Volumetric.Energy | -0.0588321 |
| 115 | Atomic.Fraction.Charge | Stability.Discharge | -0.0523971 |
| 19 | Atomic.Fraction.Charge | Average.Voltage | -0.0385556 |
| 129 | Stability.Charge | Steps | -0.0374860 |
| 97 | Max.Delta.Volume | Stability.Charge | 0.0337587 |
| 104 | Atomic.Fraction.Discharge | Stability.Charge | 0.0324051 |
| 46 | Stability.Discharge | Volumetric.Capacity | 0.0317012 |
| 127 | Atomic.Fraction.Charge | Steps | 0.0297369 |
| 103 | Atomic.Fraction.Charge | Stability.Charge | -0.0273571 |
| 7 | Atomic.Fraction.Charge | Max.Delta.Volume | 0.0213153 |
| 120 | Max.Voltage.Step | Stability.Discharge | -0.0165552 |
| 116 | Atomic.Fraction.Discharge | Stability.Discharge | 0.0143204 |
| 121 | Max.Delta.Volume | Steps | -0.0132582 |
| 111 | Gravimetric.Capacity | Stability.Discharge | 0.0125390 |
| 133 | Max.Delta.Volume | Max.Voltage.Step | -0.0099251 |
| 109 | Max.Delta.Volume | Stability.Discharge | 0.0077357 |
| 139 | Atomic.Fraction.Charge | Max.Voltage.Step | 0.0053420 |
| 43 | Atomic.Fraction.Charge | Volumetric.Capacity | 0.0012456 |
p <- ggplot(correlation_df) +
geom_tile(aes(x = x, y = y, fill = abs(cor), text = paste("Korelacja pomiędzy", x, "i", y, "=", abs(cor)))) +
theme(axis.title = element_blank()) +
labs(fill="Korelacja") +
scale_fill_gradient(low="white", high="green") +
theme_minimal()
ggplotly(p, tooltip = "text") %>%
layout(
xaxis = list(
tickangle = 45,
title = ""
),
yaxis = list(
title = ""
)
)
Przedstawienie zależności 5 par zmiennych o najwyższej korelacji
top_5_correlation <- correlation_df_one_dir[order(-abs(correlation_df_one_dir$cor)), ] %>%
slice(1:5)
knitr::kable(top_5_correlation)
| x | y | cor |
|---|---|---|
| Gravimetric.Energy | Volumetric.Energy | 0.9283253 |
| Gravimetric.Capacity | Volumetric.Capacity | 0.8584163 |
| Stability.Charge | Stability.Discharge | 0.8028701 |
| Atomic.Fraction.Discharge | Gravimetric.Capacity | 0.6807716 |
| Average.Voltage | Gravimetric.Energy | 0.6656523 |
ggplotly(
ggplot(data, aes(x = Gravimetric.Energy, y = Volumetric.Energy)) +
geom_point(aes(
x = Gravimetric.Energy,
y = Volumetric.Energy,
text = paste("ID baterii:", Battery.ID,
"\nGravimetric.Energy:",Gravimetric.Energy,
"\nVolumetric.Energy", Volumetric.Energy
)
)) +
geom_smooth(method = lm) +
labs(title = paste("Gravimetric.Energy i Volumetric.Energy")) +
theme_minimal(),
tooltip = "text"
)
ggplotly(
ggplot(data, aes(x = Gravimetric.Capacity, y = Volumetric.Capacity)) +
geom_point(aes(
x = Gravimetric.Capacity,
y = Volumetric.Capacity,
text = paste(
"ID baterii:", Battery.ID,
"\nGravimetric.Capacity:", Gravimetric.Capacity,
"\nVolumetric.Capacity:", Volumetric.Capacity
)
)) +
geom_smooth(method = lm) +
labs(title = paste("Gravimetric.Capacity i Volumetric.Capacity")) +
theme_minimal(),
tooltip = "text"
)
ggplotly(
ggplot(data, aes(x = Stability.Charge, y = Stability.Discharge)) +
geom_point(aes(
x = Stability.Charge,
y = Stability.Discharge,
text = paste(
"ID baterii:", Battery.ID,
"\nStability.Charge:", Stability.Charge,
"\nStability.Discharge:", Stability.Discharge
)
)) +
geom_smooth(method = lm) +
labs(title = paste("Stability.Charge i Stability.Discharge")) +
theme_minimal(),
tooltip = "text"
)
ggplotly(
ggplot(data, aes(x = Atomic.Fraction.Discharge, y = Gravimetric.Capacity)) +
geom_point(aes(
x = Atomic.Fraction.Discharge,
y = Gravimetric.Capacity,
text = paste(
"ID baterii:", Battery.ID,
"\nAtomic.Fraction.Discharge:", Atomic.Fraction.Discharge,
"\nGravimetric.Capacity:", Gravimetric.Capacity
)
)) +
geom_smooth(method = lm) +
labs(title = paste("Atomic.Fraction.Discharge i Gravimetric.Capacity")) +
theme_minimal(),
tooltip = "text"
)
ggplotly(
ggplot(data, aes(x = Average.Voltage, y = Gravimetric.Energy)) +
geom_point(aes(
x = Average.Voltage,
y = Gravimetric.Energy,
text = paste(
"ID baterii:", Battery.ID,
"\nAverage.Voltage:", Average.Voltage,
"\nGravimetric.Energy:", Gravimetric.Energy
)
)) +
geom_smooth(method = lm) +
labs(title = paste("Average.Voltage i Gravimetric.Energy")) +
theme_minimal(),
tooltip = "text"
)